Combine environmental data from SMHI and Copernicus

Author

Viktor Thunell

Published

October 30, 2024

Introduction

This script takes netcdf files with spatiotemporal environmental data and tranform these to a suitable format for our analysis.

We add environmental information to the stomach observations (see 03-add-env-data-to-stomachs) and use these as covariates in the models and in prediction (see 04-make-pred-grid).

The data comes from… * Bottom oxygen, temperature and salinity is from spatiotemporal simulated data available from the Copernicus and the Swedish Meteorological and Hydrological Institute (SMHI). * From Copernicus, we use the Baltic sea reanalysis (for years 1993-2021) and the analysis and forecast (for 2022-2023) of Nemo for salinity and temperature and the biogeochemical model ERGOM. * From SMHI we used hindcast simulations (1961-2017) of the NEMO–SCOBI model (NEMO-Nordic coupled to the Swedish Coastal and Ocean Biogeochemical model SCOBI, https://doi.org/10.5194/bg-2023-116).

Below, the two data sets are referred to as Copernicus and hindcast.

Load libraries

pkgs <- c("tidyverse", "sp", "raster", "devtools", "RCurl", "sdmTMB", "viridis", "terra", "ncdf4", "chron", "ncdf4", "tidync", "patchwork", "conflicted") 

if(length(setdiff(pkgs, rownames(installed.packages()))) > 0){

    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
}

invisible(lapply(pkgs, library, character.only = T))

# Source code for map plots
devtools::source_url("https://raw.githubusercontent.com/VThunell/Lammska_cod-fr/main/R/functions/map-plot.R")
# Packages not on CRAN
# devtools::install_github("seananderson/ggsidekick") # not on CRAN 
library(ggsidekick)
theme_set(theme_sleek())

# Set path
home <- here::here()

conflicts_prefer(tidylog::mutate, tidylog::filter, tidylog::slice_sample, tidylog::drop_na, dplyr::summarise, tidylog::distinct)

1. NEMO hindcast data 1961-2017 (bottom oxygen, temperature and salinity)

The SMHI hindcast data covering the baltic and our variables of interest comes as one netdcf file per month (12 per year). Here we retrieve the info we need in each netcdf file and combine them into a dataframe.

netcdf files often has has lat and long as dimensions covering space (and someimes depth and time) but these don’t have that so that we can easily go from arrays to 2-dimensional matrices or data frames which are models rely on. Now lat and lon are instead 2d variables and the grid dimension (x, y) are just integer-valued (1). So we have to fiddle with them a bit (join in coordinates to the same file) to extract the space and depth for each time slice (month year).

# library(tidyterra) # due to conflicting function names in tidyterra and dplyr we use this only here
# # Get the file paths for each netcdf
# path <- paste0(home, "/data/hindcastRunToSLU/20240701_DataDelivery_SMHI_SLU")
# filepaths <- list.files(path, full.names=TRUE)
# 
# # turn filepath name ending into corresponding year and moth and check if any years do not contain 12 months
# as.data.frame(filepaths) |> 
#   mutate(year = as.factor(str_sub(filepaths, start = 99, end=102)),
#          month = as.factor(str_sub(filepaths, start = 103, end=104))) |>
#   summarise(n=n(), .by = year)
# 
# f_envdat <- function(file)  {
#   a <- tidync(file) |>
#     hyper_tibble()
# 
#   b <- tidync(file) |> 
#     activate( "D3,D2" ) |> # grid identifiers
#     hyper_tibble()
# 
#   c <-left_join(a, b, by = c("x","y")) |> 
#     filter(between(nav_lat, 54, 60) & between(nav_lon, 12.5, 24.5), # reduce extent from coord in stomach data
#            !vosaline == 0 ) |> # to filter out non-existing values, could also be oxy or votemper
#     filter(depth == max(depth), .by= c(nav_lat,nav_lon)) |>
#     mutate(year = str_sub(file, start = 99, end=102),
#            month = str_sub(file, start = 103, end=104))
#   }
# 
# time <- Sys.time() # 35 min!!!
# 
# hindenv_df <- filepaths |>
#   map(\(x) f_envdat(x)) |>
# list_rbind()
# 
# Sys.time() - time
# 
# hindenv_df <- hindenv_df |>
#   dplyr::select(-x,-y) |>
#   rename(lat = nav_lat,
#          lon = nav_lon,
#          sal = vosaline,
#          temp = votemper) |>
#   mutate(month = as.integer(month),
#          year = as.integer(year))
# 
# saveRDS(hindenv_df, file = paste0(home, "/data/environment/hindcast_1961_2017.rds"))

2. Copernicus reanalysis and forecast 1993-2024 (bottom oxygen, temperature and salinity)

# # Oxygen
# 
# # Source: 
# # forecast, cmems_mod_bal_bgc_anfc_P1M-m_1723620449355.nc
# # https://data.marine.copernicus.eu/product/BALTICSEA_ANALYSISFORECAST_BGC_003_007/download?dataset=cmems_mod_bal_bgc_anfc_P1M-m_202311
# 
# # reanalysis, cmems_mod_bal_bgc_my_P1M-m_1723620645492.nc
# # https://data.marine.copernicus.eu/product/BALTICSEA_MULTIYEAR_BGC_003_012/download?dataset=cmems_mod_bal_bgc_my_P1M-m_202303
# 
# # forecast
# oxy_df1_fore <- tidync(paste0(home, "/data/NEMO/cmems_mod_bal_bgc_anfc_P1M-m_1723620449355.nc")) |>
#   hyper_tibble() |>
#   mutate(date = as_datetime(time, origin = '1970-01-01'),
#          month = month(date),
#          day = day(date),
#          year = year(date),
#          month_year = paste(month, year, sep = "_")) |>
#   mutate(oxy = mean(o2b, na.rm = TRUE), .by = c(month_year, latitude, longitude)) |>
#   mutate(oxy = oxy * 0.0223909) # conversion factor from mmol/m3 to ?
# # the hindcast data is averaged by month, so we do the same here even though it seems like its only one obs per month.
# 
# # reanalysis
# oxy_df1_rean <- tidync(paste0(home, "/data/NEMO/cmems_mod_bal_bgc_my_P1M-m_1723620645492.nc")) |>
#   hyper_tibble() |>
#   mutate(date = as_datetime(time, origin = '1970-01-01'),
#          month = month(date),
#          day = day(date),
#          year = year(date),
#          month_year = paste(month, year, sep = "_")) |>
#   mutate(oxy = mean(o2b, na.rm = TRUE), .by = c(month_year, latitude, longitude)) |>
#   mutate(oxy = oxy * 0.0223909) # conversion factor from mmol/m3 to ?
# # the hindcast data is averaged by month, so we do the same here even though it seems like its only one obs per month.
# 
# oxy_df <- bind_rows(oxy_df1_rean, oxy_df1_fore)  |>
#   dplyr::select(month_year, latitude, longitude, oxy, day, month, year)
# 
# # Temperature and salinity
# 
# # source 
# # forecast cmems_mod_bal_phy_anfc_P1M-m_1723620759845.nc
# # https://data.marine.copernicus.eu/product/BALTICSEA_ANALYSISFORECAST_PHY_003_006/download?dataset=cmems_mod_bal_phy_anfc_P1M-m_202311
# # reanalysis cmems_mod_bal_phy_my_P1M-m_1723620956814.nc
# # https://data.marine.copernicus.eu/product/BALTICSEA_MULTIYEAR_PHY_003_011/download
# 
# # forecast
# tempsal_df1_fore <- tidync(paste0(home, "/data/NEMO/cmems_mod_bal_phy_anfc_P1M-m_1723620759845.nc")) |>
#   hyper_tibble() |>
#   mutate(date = as_datetime(time, origin = '1970-01-01'),
#          month = month(date),
#          day = day(date),
#          year = year(date),
#          month_year = paste(month, year, sep = "_")) |>
#   mutate(sal = mean(sob, na.rm = TRUE), .by = c(month_year, latitude, longitude)) |>
#   mutate(temp = mean(bottomT, na.rm = TRUE), .by = c(month_year, latitude, longitude))
# 
# #sum(ncvar_get(ncin,"bottomT") == -999, na.rm=TRUE) # no fill values to replace
# 
# # reanalysis
# tempsal_df1_rean <- tidync(paste0(home, "/data/NEMO/cmems_mod_bal_phy_my_P1M-m_1723620956814.nc")) |>
#   hyper_tibble() |>
#   mutate(date = as_datetime(time, origin = '1970-01-01'),
#          month = month(date),
#          day = day(date),
#          year = year(date),
#          month_year = paste(month, year, sep = "_")) |>
#   mutate(sal = mean(sob, na.rm = TRUE), .by = c(month_year, latitude, longitude)) |>
#   mutate(temp = mean(bottomT, na.rm = TRUE), .by = c(month_year, latitude, longitude))
# 
# tempsal_df <- bind_rows(tempsal_df1_rean, tempsal_df1_fore) |>
#   dplyr::select(month_year, latitude, longitude, sal, temp, day, month, year)
# 
# # join env variables
# copenv_df <-
#   left_join(oxy_df, tempsal_df) |>
#   rename(lat = latitude,
#          lon = longitude)
# 
# saveRDS(copenv_df, file = paste0(home, "/data/environment/copernicus_1993_2024.rds"))
#
# detach(tidyterra) # as this conflicts with dplyr function names and we´re domne with tidyterra 

3. Combine hindcast and Copernicus data

Combine SMHI and Copernicus in a df

# load hindcast
hindenv_df <- readRDS(file = paste0(home, "/data/environment/hindcast_1961_2017.rds"))

hindenv_df_2 <- hindenv_df |>
  mutate(model = "hindcast",
         model = as.factor(model))
  
# load copernicus
copenv_df <- readRDS(file = paste0(home, "/data/environment/copernicus_1993_2021.rds"))

copenv_df_2 <- copenv_df |>
  filter(between(lat, 54, 60) & between(lon, 12.5, 24.5)) |> # reduce extent based on coords in stomach data
  mutate(model = "copernicus",
         model = as.factor(model))
filter: removed 6,720,588 rows (19%), 28,860,240 rows remaining
# copenv_df_2 |>
#   filter(year %in% c(2017:2022),
#          month ==3) |>
#   ggplot(aes(lon, lat, fill = temp)) +
#   geom_raster() +
#   facet_wrap(~year, nrow = 2) +
#   scale_fill_viridis()
  
str(copenv_df_2)
tibble [28,860,240 × 10] (S3: tbl_df/tbl/data.frame)
 $ month_year: chr [1:28860240] "1_1993" "1_1993" "1_1993" "1_1993" ...
 $ lat       : num [1:28860240] 54 54 54 54 54 ...
 $ lon       : num [1:28860240] 13.8 13.8 13.8 13.9 13.9 ...
 $ oxy       : num [1:28860240] 9.01 8.95 8.93 8.81 8.79 ...
 $ day       : int [1:28860240] 1 1 1 1 1 1 1 1 1 1 ...
 $ month     : num [1:28860240] 1 1 1 1 1 1 1 1 1 1 ...
 $ year      : num [1:28860240] 1993 1993 1993 1993 1993 ...
 $ sal       : num [1:28860240] 5.22 4.98 5.04 5.15 5.34 ...
 $ temp      : num [1:28860240] 0.808 0.819 0.848 0.879 0.856 ...
 $ model     : Factor w/ 1 level "copernicus": 1 1 1 1 1 1 1 1 1 1 ...
str(hindenv_df_2)
tibble [12,517,200 × 13] (S3: tbl_df/tbl/data.frame)
 $ temp    : num [1:12517200] 0.547 0.557 0.575 1.235 0.589 ...
 $ sal     : num [1:12517200] 8.16 8.21 8.24 7.88 8.31 ...
 $ oxy     : num [1:12517200] 9.47 9.48 9.48 9.21 9.47 ...
 $ hypoxia1: num [1:12517200] 0 0 0 0 0 0 0 0 0 0 ...
 $ hypoxia2: num [1:12517200] 0 0 0 0 0 0 0 0 0 0 ...
 $ anoxia  : num [1:12517200] 0 0 0 0 0 0 0 0 0 0 ...
 $ depth   : num [1:12517200] 7.54 7.54 7.54 7.54 7.54 ...
 $ time    : num [1:12517200] 1.93e+09 1.93e+09 1.93e+09 1.93e+09 1.93e+09 ...
 $ lat     : num [1:12517200] 54.2 54.2 54.2 54.2 54.2 ...
 $ lon     : num [1:12517200] 13.5 13.5 13.6 13.8 13.4 ...
 $ year    : int [1:12517200] 1961 1961 1961 1961 1961 1961 1961 1961 1961 1961 ...
 $ month   : int [1:12517200] 1 1 1 1 1 1 1 1 1 1 ...
 $ model   : Factor w/ 1 level "hindcast": 1 1 1 1 1 1 1 1 1 1 ...
# combine datasets
env_df <- bind_rows(hindenv_df_2, copenv_df_2 ) |>
  dplyr::select(lat, lon, oxy, temp, sal, month, year, model)

# NAs in sal and temp!
map(env_df, ~sum(is.na(.)))
$lat
[1] 0

$lon
[1] 0

$oxy
[1] 0

$temp
[1] 318

$sal
[1] 208

$month
[1] 0

$year
[1] 0

$model
[1] 0

Plot environmental covariates for march and a few selected years

#oxy
env_df |> 
  filter(month == 3, #the most common month in the stomach data
         lon > 13.5,
         year %in% c(1993,1999,2007,2014)) |>
  ggplot(aes(lon, lat, fill = oxy)) +
  geom_raster() +
  facet_wrap(model~year, nrow = 2)
filter: removed 41,014,352 rows (99%), 363,088 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

#temp
env_df |> 
  filter(month == 3, #the most common month in the stomach data
         lon > 13.5,
         year %in% c(1993,1999,2007,2014)) |>
  ggplot(aes(lon, lat, fill = temp)) +
  geom_raster() +
  facet_wrap(model~year, nrow = 2) +
  scale_fill_viridis()
filter: removed 41,014,352 rows (99%), 363,088 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

#sal
env_df |> 
  filter(month == 3, #the most common month in the stomach data
         lon > 13.5,
         year %in% c(1993,1999,2007,2014)) |>
  ggplot(aes(lon, lat, fill = sal)) +
  geom_raster() +
  facet_wrap(model~year, nrow = 2) +
  scale_fill_viridis(option="magma")
filter: removed 41,014,352 rows (99%), 363,088 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

4. Model spatial differences between Copernicus and SMHI-hindcast and predict the Hindcast for 2018-2024

Oxygen

To account for consistent differences between the Copernicus Baltic reanalysis and forecast and the SMHI Hindcast, we model the spatial and temporal variation between the models and predict the years that are missing from the Hindcast (2018-2024).

# There are differneces between the data but in these plots, the main diff is that the hindcast contains a larger range of negative values. The "dip" in the distrubution is still around 5.
env_df |>
  slice_sample( n = 100000) |>
  filter(model == "copernicus") |>
  ggplot(aes(x=oxy)) + 
  geom_histogram() +
  labs(title = "Copernicus") +
  
env_df |>
  slice_sample( n = 100000) |>
  filter(model == "hindcast") |>
  ggplot(aes(x=oxy)) + 
  geom_histogram() +
  labs(title = "hindcast")
filter: removed 30,064 rows (30%), 69,936 rows remaining
filter: removed 69,817 rows (70%), 30,183 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# model data, filter and sample
env_df_2 <- env_df |>
  filter(year > 1992) |>
  slice_sample( n = 100000) |> # sample data as it is too much data to model
  mutate(yearmonth = ((year-1963)*12)+month) |> # a continuous time predictpr fpr the AR process
  # group_by(model) |> # for equal sampling between groups
  # sample_n(50000) |>
  # ungroup() |>
  add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
filter: removed 7,027,200 rows (17%), 34,350,240 rows remaining
mesh <- make_mesh(env_df_2, c("X", "Y"), cutoff = 5)
This mesh has > 1000 vertices. Mesh complexity has the single largest influence
on fitting speed. Consider whether you require a mesh this complex, especially
for initial model exploration. Check `your_mesh$mesh$n` to view the number of
vertices.
plot(mesh)

# spatial varying model intercept
Mod_oxy1 <- 
  sdmTMB(
  data = env_df_2 ,
  formula = oxy ~ 0 + model + as.factor(year),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# spatial grf and both year and month as factor predictors
Mod_oxy2 <- 
  sdmTMB(
  data = env_df_2,
  formula = oxy ~ 0 + model + as.factor(year) + as.factor(month),
  mesh = mesh,
  spatial = "on",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# spatial_varying effect of model and both year and month as factor predictors
Mod_oxy3 <- 
  sdmTMB(
  data = env_df_2,
  formula = oxy ~ 0 + model + as.factor(year) + as.factor(month),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# time_varying ar1 intercept (year and month) and spatial_varying effect of model
Mod_oxy4 <-
  sdmTMB(
  data = env_df_2, 
  formula = oxy ~ 0 + model,
  mesh = mesh,
  time_varying_type = "ar1",
  time_varying = ~1,
  time = "yearmonth",
  extra_time = c(719),
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

sanity(Mod_oxy1)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_oxy2)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_oxy3)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_oxy4)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
AIC(Mod_oxy1)
[1] 298060.4
AIC(Mod_oxy2)
[1] 286757.5
AIC(Mod_oxy3)
[1] 274121.5
AIC(Mod_oxy4)
[1] 274446.8

Model 3 and 4 has the lowest and similar AIC. Which one of these predicts oxygen best for years that are disclosed for the model (2007-2012 used below) and how do the hindcast predicted years 2018-2024 compare to the Copernicus forecast. Now we only use data from 2000 to speed things up bit.

# remove hindcast data for 2007-2012
mesh <- make_mesh(env_df_2 |> filter(year > 2000) |> filter(!(year %in% 2007:2012 & model == "hindcast")), c("X", "Y"), cutoff = 4)
filter: removed 26,419 rows (26%), 73,581 rows remaining
filter: removed 3,743 rows (5%), 69,838 rows remaining
This mesh has > 1000 vertices. Mesh complexity has the single largest influence
on fitting speed. Consider whether you require a mesh this complex, especially
for initial model exploration. Check `your_mesh$mesh$n` to view the number of
vertices.
Mod_oxy3b <- 
  sdmTMB(
  data = env_df_2 |> filter(year > 2000) |> filter(!(year %in% 2007:2012 & model == "hindcast")),
  formula = oxy ~ 0 + model + as.factor(year) + as.factor(month),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)
filter: removed 26,419 rows (26%), 73,581 rows remaining
filter: removed 3,743 rows (5%), 69,838 rows remaining
Mod_oxy4b <- 
  sdmTMB(
  data = env_df_2 |> filter(year > 2000) |> filter(!(year %in% 2007:2012 & model == "hindcast")),
  formula = oxy ~ 0 + model, # + as.factor(year) + as.factor(month)
  mesh = mesh,
  time_varying_type = "ar1",
  time_varying = ~1,
  time = "yearmonth",
  extra_time = c(719),
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)
filter: removed 26,419 rows (26%), 73,581 rows remaining
filter: removed 3,743 rows (5%), 69,838 rows remaining
# Extend data with missing year for hindcast and predict on data.

# Model 3 newdata
env_df_predhind3 <- env_df |>
  filter( model == "hindcast") |>
  distinct(lat, lon) |>
  replicate_df(time_name = "year", time_values = 2001:2023) |>
  mutate( model = as_factor("hindcast"),
          month = 3) |>
  add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
filter: removed 28,860,240 rows (70%), 12,517,200 rows remaining
oxy_predhind3 <- predict(Mod_oxy3b, newdata = env_df_predhind3)

# Model 4 newdata
env_df_predhind4 <- env_df |>
  filter( model == "hindcast") |>
  dplyr::distinct(lat, lon) |>
  replicate_df(time_name = "yearmonth", time_values = seq((2001-1963)*12+3,(2023-1963)*12+3, by=12)) |> # for march every year
  mutate( model = as_factor("hindcast")) |>
  add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
filter: removed 28,860,240 rows (70%), 12,517,200 rows remaining
oxy_predhind4 <- predict(Mod_oxy4b, newdata = env_df_predhind4)

# Plot compare model 3 and 4
bind_rows(oxy_predhind3 |> mutate(Mod ="Mod_oxy3"), oxy_predhind4 |> mutate(Mod = "Mod_oxy4", year = 1963+(yearmonth-3)/12)) |>
  mutate(mean_est = mean(est), .by = c(year, Mod)) |>
  filter(year > 1990) |>
  ggplot() +
  geom_line(aes(year, mean_est, color = as.factor(Mod))) +
  geom_line(data = env_df |> filter(month == 3, year > 1990) |> summarise(oxy = mean(oxy), .by = c(year,model)), aes(x = year, y = oxy, linetype = model)) +
  geom_vline( xintercept = c(2007,2012)) 
filter: no rows removed
filter: removed 38,453,004 rows (93%), 2,924,436 rows remaining

While model 3 and 4 differ in their predictions for 2007-2012 they are very similar. For years above 2018 they are even more similar.

Now we compare spatial differences model 3 and 4 for oxygen.

oxy_predb3 <- predict(Mod_oxy3)
oxy_predb4 <- predict(Mod_oxy4)

bind_rows(oxy_predb3 |> mutate(oxymod="Mod_oxy3"), oxy_predb4 |> mutate(oxymod="Mod_oxy4")) |>
  mutate( diff = oxy - est, .by = c(year, model)) |>
  mutate(decade = round(year/10) * 10) |>
  summarise(decadaldiffs = mean(abs(diff)), .by = c(decade, oxymod)) |>
  arrange(decade) |>
  filter(decade > 1990)
filter: removed 2 rows (25%), 6 rows remaining
# A tibble: 6 × 3
  decade oxymod   decadaldiffs
   <dbl> <chr>           <dbl>
1   2000 Mod_oxy3        0.655
2   2000 Mod_oxy4        0.651
3   2010 Mod_oxy3        0.655
4   2010 Mod_oxy4        0.652
5   2020 Mod_oxy3        0.675
6   2020 Mod_oxy4        0.671
bind_rows(oxy_predb3 |> mutate(oxymod="Mod_oxy3"), oxy_predb4 |> mutate(oxymod="Mod_oxy4")) |>
  mutate( diff = oxy - est, .by = c(year, model)) |>
  mutate(decade = round(year/10) * 10) |>
  filter( model == "hindcast",
          decade > 1990) |>
  ggplot(aes(lon,lat, color = diff)) +
  geom_point(size=0.1) +
  facet_wrap(decade~oxymod, ncol = 2) +
  scale_colour_viridis(option= "turbo")
filter: removed 170,528 rows (85%), 29,472 rows remaining

Despite beeing very similar, the model with an AR1 process produces lower mean spatial difference. Visually they are indistinguishable I think. Ar1 is the winner as it has lower diffs and lower AIC.

# Selected model of oxygen
oxy_predhind4 <- predict(Mod_oxy4, newdata = env_df_predhind4)

# Plot mean oxygen model for all years (1963-2023)
oxy_predhind4 |> mutate(Mod = "Mod_oxy4", year = 1963+(yearmonth-3)/12) |>
  mutate(mean_est = mean(est), .by = c(year)) |>
  ggplot() +
  geom_line(aes(year, mean_est, linetype = Mod)) +
  geom_line(data = env_df |> filter(month == 3, year > 1990) |> summarise(oxy = mean(oxy), .by = c(year,model)), aes(x = year, y = oxy, color = model)) +
  labs(title = "Oxygen model")
filter: removed 38,453,004 rows (93%), 2,924,436 rows remaining

# spatially for march
env_df |>
  filter(year %in% c(2005),
         month == 3,
         model == "hindcast") |>
  mutate(source="data") |>
  bind_rows(oxy_predhind4 |>
              mutate(year = floor(1963+(yearmonth/12)),
                     source="modelled",
                     oxy=est) |>
              filter(year %in% c(2005))) |>
  ggplot(aes(lon,lat, fill = oxy)) +
  geom_raster() +
  facet_wrap(~source) +
  scale_fill_viridis()
filter: removed 41,359,140 rows (>99%), 18,300 rows remaining
filter: removed 402,600 rows (96%), 18,300 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

Salinity

We skip corresponding oxygen model 2 for salinity here

env_df |>
  filter(model == "copernicus") |>
  ggplot(aes(x=sal)) + 
  geom_histogram() +
  labs(title = "Copernicus") +
  
env_df |>
  filter(model == "hindcast") |>
  ggplot(aes(x=sal)) + 
  geom_histogram() +
  labs(title = "Hindcast")
filter: removed 12,517,200 rows (30%), 28,860,240 rows remaining
filter: removed 28,860,240 rows (70%), 12,517,200 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 208 rows containing non-finite outside the scale range
(`stat_bin()`).
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

env_df_2 <- env_df |>
  filter(year > 1992) |>
  drop_na(sal) |>
  slice_sample( n = 100000) |> # sample data as it is too much data to model
  mutate(yearmonth = ((year-1963)*12)+month) |> # a continuous time predictpr fpr the AR process
  add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
filter: removed 7,027,200 rows (17%), 34,350,240 rows remaining
summary(env_df_2$sal)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
 0.00085  7.02648  8.12862  8.80050 10.27334 34.61896 
mesh <- make_mesh(env_df_2, c("X", "Y"), cutoff = 5)
This mesh has > 1000 vertices. Mesh complexity has the single largest influence
on fitting speed. Consider whether you require a mesh this complex, especially
for initial model exploration. Check `your_mesh$mesh$n` to view the number of
vertices.
# spatial varying model intercept
Mod_sal1 <- 
  sdmTMB(
  data = env_df_2,
  formula = sal ~ 0 + model + as.factor(year),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# spatial varying model intercept and year and month as factor predictors
Mod_sal3 <- 
  sdmTMB(
  data = env_df_2,
  formula = sal ~ 0 + model + as.factor(year) + as.factor(month),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# time_varying ar1 intercept (year and month) and spatial_varying effect of model
Mod_sal4 <- 
  sdmTMB(
  data = env_df_2, 
  formula = sal ~ 0 + model,
  mesh = mesh,
  time_varying_type = "ar1",
  time_varying = ~1,
  time = "yearmonth",
  extra_time = c(719), # add the month that is missing
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

sanity(Mod_sal1)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_sal3)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_sal4)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
AIC(Mod_sal1)
[1] 230102.6
AIC(Mod_sal3)
[1] 229924.8
AIC(Mod_sal4)
[1] 228898.2

The AR1 model is best for salinity as well based on AIC.

# extend data with missing year for hindcast and predict on data. The resulting df we will use to extract values to the pred grid instead of predicting on the pred grid. 
sal_predhind <- predict(Mod_sal4, newdata = env_df_predhind4)

# Plot salinity model
sal_predhind |> mutate(Mod = "Mod_sal4", year = floor(1963+(yearmonth/12))) |>
  mutate(mean_est = mean(est), .by = c(year)) |>
  ggplot() +
  geom_line(aes(year, mean_est, linetype = Mod)) +
  geom_line(data = env_df |> filter(month == 3, year > 1990) |> summarise(sal = mean(sal), .by = c(year,model)), aes(x = year, y = sal, color = model)) +
  labs(title = "Salinity model")
filter: removed 38,453,004 rows (93%), 2,924,436 rows remaining

# spatially for march
env_df |>
  filter(year %in% c(2005),
         month == 3,
         model == "hindcast",
         lon > 13.5) |>
  mutate(source="data") |>
  bind_rows(sal_predhind |>
              mutate(year = floor(1963+(yearmonth/12)),
                     source="modelled",
                     sal=est) |>
              filter(year %in% c(2005),
                     lon > 13.5)) |>
  ggplot(aes(lon,lat, fill = sal)) +
  geom_raster() +
  facet_wrap(~source) +
  scale_fill_viridis()
filter: removed 41,359,744 rows (>99%), 17,696 rows remaining
filter: removed 403,204 rows (96%), 17,696 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

Temperature

env_df |>
  filter(model == "copernicus") |>
  ggplot(aes(x=temp)) + 
  geom_histogram() +
  labs(title = "Copernicus") +
  
env_df |>
  filter(model == "hindcast") |>
  ggplot(aes(x=temp)) + 
  geom_histogram() +
  labs(title = "Hindcast")
filter: removed 12,517,200 rows (30%), 28,860,240 rows remaining
filter: removed 28,860,240 rows (70%), 12,517,200 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 318 rows containing non-finite outside the scale range
(`stat_bin()`).
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

env_df_2 |>
  filter(year > 2010) |>
  mutate(geomean=mean(temp), .by = c(lat,lon, month, model)) |>
  ggplot() +
  geom_point(aes(month, temp, color = model)) +
  stat_smooth(aes(month, temp, color = model), method = "gam", formula = y ~ s(x)) 
filter: removed 59,330 rows (59%), 40,670 rows remaining
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

env_df_2 <- env_df |>
  filter(year > 2010) |>
  drop_na(temp) |>
  slice_sample( n = 100000) |> # sample data as it is too much data to model
  mutate(yearmonth = ((year-1963)*12)+month) |> # a continuous time predictor for the AR process
  add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
filter: removed 27,384,768 rows (66%), 13,992,672 rows remaining
mesh <- make_mesh(env_df_2, c("X", "Y"), cutoff = 5)
This mesh has > 1000 vertices. Mesh complexity has the single largest influence
on fitting speed. Consider whether you require a mesh this complex, especially
for initial model exploration. Check `your_mesh$mesh$n` to view the number of
vertices.
# spatial varying model intercept
Mod_temp1 <- 
  sdmTMB(
  data = env_df_2,
  formula = temp ~ 0 + model + as.factor(year),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity")
)

# spatial_varying effect of model
Mod_temp3 <- 
  sdmTMB(
  data = env_df_2,
  formula = temp ~ 0 + model + as.factor(year) + as.factor(month),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# time_varying ar1 intercept (year and month) and spatial_varying effect of model
Mod_temp4 <-
  sdmTMB(
  data = env_df_2, 
  formula = temp ~ 0 + model,
  mesh = mesh,
  time_varying_type = "ar1",
  time_varying = ~1,
  time = "yearmonth",
  extra_time = c(719),
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

AIC(Mod_temp1)
[1] 514650.6
AIC(Mod_temp3) # this is the one.
[1] 479271.2
AIC(Mod_temp4)
[1] 479447.1
sanity(Mod_temp1)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_temp3)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
sanity(Mod_temp4)
✔ Non-linear minimizer suggests successful convergence
✔ Hessian matrix is positive definite
✔ No extreme or very small eigenvalues detected
✔ No gradients with respect to fixed effects are >= 0.001
✔ No fixed-effect standard errors are NA
✔ No standard errors look unreasonably large
✔ No sigma parameters are < 0.01
✔ No sigma parameters are > 100
✔ Range parameter doesn't look unreasonably large
# Mod4_res <- residuals(Mod_temp4)
# qqnorm(Mod4_res[which(is.finite(Mod4_res))])
# qqline(Mod4_res[which(is.finite(Mod4_res))])

The model with month and year as factor predictors is best for temperature based on AIC but it turns out they are not performing well spatially

env_df_predhind4 <- env_df |>
  filter( model == "hindcast") |>
  dplyr::distinct(lat, lon) |>
  replicate_df(time_name = "yearmonth", time_values = seq((2011-1963)*12+3,(2023-1963)*12+3, by=12)) |> # for march every year
  mutate( model = as_factor("hindcast"),
          month = 3) |>
  add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
filter: removed 28,860,240 rows (70%), 12,517,200 rows remaining
# Use the extended data pred_hind4 to predict missing years.
temp_predhind3 <- predict(Mod_temp3, newdata = env_df_predhind4 |> mutate(year = floor(1963+(yearmonth/12)), month = 3))

temp_predhind4 <- predict(Mod_temp4, newdata = env_df_predhind4)

# Plot temperature models, both are great at yearly means...
temp_predhind3 |> mutate(Mod = "Mod_temp3") |>
  bind_rows(temp_predhind4 |> mutate(Mod = "Mod_temp4", 
                                     year = floor(1963+(yearmonth/12)))) |>
  mutate(mean_est = mean(est), .by = c(year)) |>
  ggplot() +
  geom_line(aes(year, mean_est, linetype = Mod)) +
  geom_line(data = env_df |> filter(month == 3, year > 1990) |> summarise(temp = mean(temp), .by = c(year,model)), aes(x = year, y = temp, color = model)) +
  labs(title = "Temperature model")
filter: removed 38,453,004 rows (93%), 2,924,436 rows remaining

# ...but spatially they are not good
env_df |>
  filter(year %in% c(2017),
         month == 3,
         model == "hindcast") |>
  mutate(source="data") |>
  bind_rows(temp_predhind3 |>
              mutate(source="model3",
                     temp=est) |>
              filter(year %in% c(2017))) |>
  bind_rows(temp_predhind4 |>
              mutate(year = floor(1963+(yearmonth/12)),
                     source="model4",
                     temp=est) |>
              filter(year %in% c(2017))) |>
  ggplot(aes(lon,lat, fill = temp)) +
  geom_raster() +
  facet_wrap(~source) +
  scale_fill_viridis()
filter: removed 41,359,140 rows (>99%), 18,300 rows remaining
filter: removed 219,600 rows (92%), 18,300 rows remaining
filter: removed 219,600 rows (92%), 18,300 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

Teh predictions doesnt look too good. Test more temperature models…

# The model is not cpaturong the spatial process. One dierction is to add terms to the spstial varying model specific intercept.  Some testing and reasoning leads me to believe that the month effect is important for the spatial proocess - Within year, temperature varies alot and this depends mainly on depth which is a spatial time-invariant variable.

# test a month smooth 
Mod_temp3b <- 
  sdmTMB(
  data = env_df_2,
  formula = temp ~ 0 + model + as.factor(year) + s(month, bs = "cc"),
  mesh = mesh,
  spatial_varying = ~ 0 + model,
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

# factor month on spatial instead of fixed
Mod_temp3c <- 
  sdmTMB(
  data = env_df_2,
  formula = temp ~ 0 + model + as.factor(year),
  mesh = mesh,
  spatial_varying = ~ 0 + model + as.factor(month),
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)

Mod_temp3cc <- 
  sdmTMB(
  data = env_df_2,
  formula = temp ~ 0 + model + as.factor(year) + as.factor(month),
  mesh = mesh,
  spatial_varying = ~ 0 + model + as.factor(month),
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)
# mod 4 smooth month and year as time
Mod_temp4b <-
  sdmTMB(
  data = env_df_2, 
  formula = temp ~ 0 + model + s(month, bs = "cc"),
  mesh = mesh,
  time_varying_type = "ar1",
  time_varying = ~1,
  time = "year",
  #extra_time = c(719),
  spatial_varying = ~ 0 + model, 
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)
# mod 4 month and year as time
Mod_temp4c <-
  sdmTMB(
  data = env_df_2, 
  formula = temp ~ 0 + model,
  mesh = mesh,
  time_varying_type = "ar1",
  time_varying = ~1,
  time = "year",
  #extra_time = c(719),
  spatial_varying = ~ 0 + model + as.factor(month), 
  spatial = "off",
  spatiotemporal = "off",
  family = gaussian(link = "identity"),
)
AIC(Mod_temp3b)
[1] 480198.5
AIC(Mod_temp3c)  
[1] 329642.8
AIC(Mod_temp3cc) # lowest
[1] 329554.3
AIC(Mod_temp4b)
[1] 480256.2
AIC(Mod_temp4c)
[1] 329722.5

How does Mod_temp3cc look in spatial predictions

temp_predhind <- predict(Mod_temp3cc, newdata = env_df_predhind4 |> mutate(year = floor(1963+(yearmonth/12)), month = 3))

# Plot temperature models, looks good
temp_predhind |>
  mutate(yearmonth = ((year-1963)*12)+month,
         mean_est = mean(est), .by = c(yearmonth)) |>
  ggplot() +
  geom_line(aes(month+year, mean_est)) +
  geom_line(data = env_df |> filter(month == 3, year > 1990) |> summarise(temp = mean(temp), .by = c(year,model)), aes(x = year, y = temp, color = model)) +
  labs(title = "Temperature model")
filter: removed 38,453,004 rows (93%), 2,924,436 rows remaining

# and the spatial process looks much much better
env_df |>
  filter(year %in% c(2017),
         month == 3,
         model == "hindcast") |>
  mutate(source="data") |>
  bind_rows(temp_predhind |>
              mutate(source="model3",
                     temp=est) |>
              filter(year %in% c(2017))) |>
  ggplot(aes(lon,lat, fill = temp)) +
  geom_raster() +
  facet_wrap(~source) +
  scale_fill_viridis()
filter: removed 41,359,140 rows (>99%), 18,300 rows remaining
filter: removed 219,600 rows (92%), 18,300 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

5. Plot and save predictions for 2018-2024 with hindcast and plot

# combine oxy, salinity and temperature predictions
env_df_comb <-
oxy_predhind4 |>
  mutate(oxy = est,
         year = 1963+(yearmonth-3)/12) |>
  dplyr::select("lat", "lon", "year", "oxy") |>
  dplyr::left_join(sal_predhind |> mutate(sal = est, year = 1963+(yearmonth-3)/12) |> dplyr::select("lat", "lon", "sal", "year")) |>
  dplyr::left_join(temp_predhind |> mutate(temp = est, year = 1963+(yearmonth-3)/12) |> dplyr::select("lat", "lon", "temp", "year")) |> filter(year > 2017) |>
  bind_rows(env_df |> filter(year < 2018 & model == "hindcast")) |>
  dplyr::select("year","lat", "lon", "oxy", "temp", "sal") |>
  mutate(month = 3) |>
  filter(lon > 13.5)
Joining with `by = join_by(lat, lon, year)`
Joining with `by = join_by(lat, lon, year)`
filter: removed 311,100 rows (74%), 109,800 rows remaining
filter: removed 28,860,240 rows (70%), 12,517,200 rows remaining
filter: removed 416,760 rows (3%), 12,210,240 rows remaining
# plot
plot_map_fc +
  geom_point(data = env_df_comb |> filter(year == 2019, month == 3) |> add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633), aes(X*1000, Y*1000, color = oxy), alpha = 0.5) +
  theme_sleek(base_size = 6) + 
  geom_sf() +
  labs(title = "2019") +

plot_map_fc +
  geom_point(data = env_df_comb |> filter(year == 1963, month == 3) |> add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633), aes(X*1000, Y*1000, color = sal), alpha = 0.5) +
  theme_sleek(base_size = 6) + 
  geom_sf() +

plot_map_fc +
  geom_point(data = env_df_comb |> filter(year == 1999, month == 3) |> add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633), aes(X*1000, Y*1000, color = temp), alpha = 0.5) +
  theme_sleek(base_size = 6) + 
  geom_sf() 
filter: removed 12,192,544 rows (>99%), 17,696 rows remaining
filter: removed 11,997,888 rows (98%), 212,352 rows remaining
filter: removed 11,997,888 rows (98%), 212,352 rows remaining
Warning: Removed 6525 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 78300 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 78300 rows containing missing values or values outside the scale range
(`geom_point()`).

env_df_comb |>
   filter(year %in% c(2014:2021)) |>
   ggplot(aes(lon, lat, fill = oxy)) +
   geom_raster() +
   facet_wrap(~year, nrow = 2) +
   scale_fill_viridis()
filter: removed 11,290,048 rows (92%), 920,192 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

env_df_comb |>
   filter(year %in% c(2014:2021)) |>
   ggplot(aes(lon, lat, fill = sal)) +
   geom_raster() +
   facet_wrap(~year, nrow = 2) +
   scale_fill_viridis()
filter: removed 11,290,048 rows (92%), 920,192 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

env_df_comb |>
   filter(year %in% c(2014:2021)) |>
   ggplot(aes(lon, lat, fill = temp)) +
   geom_raster() +
   facet_wrap(~year, nrow = 2) +
   scale_fill_viridis()
filter: removed 11,290,048 rows (92%), 920,192 rows remaining
Warning: Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.
Raster pixels are placed at uneven horizontal intervals and will be shifted
ℹ Consider using `geom_tile()` instead.

Save models and use in extracting data for stomach data and pred_grid (scrip 03 and 04)

#save
saveRDS(Mod_oxy4, paste0(home, "/R/prepare-data/Mod_oxy.rds"))
saveRDS(Mod_sal4, paste0(home, "/R/prepare-data/Mod_sal.rds"))
saveRDS(Mod_temp3cc, paste0(home, "/R/prepare-data/Mod_temp.rds"))